import plotly.graph_objects as go
import pandas as pd
class SankeyData:
# From Tazer
df_stat = list()
# Sankey in Plotly, node - vertex, link - edge
nodes = pd.DataFrame(columns=['label', 'color'])
links = pd.DataFrame(columns=['source', 'target', 'value'])
color_map = {"task": "red",
"file": "blue",
"none": "grey"
}
block_size = 1024
def __init__(self):
pass
def load_stat(self, df_stat, params):
self.df_stat.append({ "df": df_stat,
"filename": params['filename'],
"type": params['type'],
"size": params['size'],
"task_name": params['task_name']
})
# add a node entry (task)
self.set_taskname(params['task_name'])
# add a node entry (input/output file)
# one filename entry is allowed
if len(self.nodes[self.nodes.label == params['filename']]) == 0:
self.add_node({'label': params['filename'],
'color': self.color_map['file']})
'''
self.add_node({'label': 'no read',
'color': self.color_map['none']})
self.add_node({'label': 'no write',
'color': self.color_map['none']})
'''
def set_taskname(self, name):
if len(self.nodes[self.nodes.label == name]) == 0:
self.add_node({'label': name,
'color': self.color_map['task']})
def get_node(self, idx):
return self.nodes[idx]
def add_node(self, node_dict):
# label, color, customdata, x, y
x = pd.DataFrame([node_dict])
self.nodes = pd.concat([self.nodes, x], ignore_index=True)
new_id = len(self.nodes)
def get_link(self, name):
return self.link[name]
def set_links(self):
pass
def build_links(self, no_rw=False):
links = []
key = 'block_idx'
for v in self.df_stat:
# input (r)
cnt = v['df'][key].nunique()
io_type = v['type']
fname = v['filename']
tname = v['task_name']
fidx = int(self.nodes[self.nodes.label == fname].index.values)
tidx = int(self.nodes[self.nodes.label == tname].index.values)
if io_type == "r":
t2f = {'source': fidx,
'target': tidx,
'value': cnt}
if no_rw:
t2n = {'source': fidx + 1,
'target': tidx,
'value': (v['size'] / self.block_size) - cnt}
else:
t2f = {'source': tidx,
'target': fidx,
'value': cnt}
if no_rw:
t2n = {'source': tidx,
'target': fidx + 2,
'value': (v['size'] / self.block_size) - cnt}
links.append(t2f)
links.append(t2n) if no_rw else None
links = pd.DataFrame(links)
self.links = links
def plot(self):
n = self.nodes[['label','color']].to_dict('list')
l = self.links.to_dict('list')
fig = go.Figure(data=[go.Sankey(
node = n,
link = l)])
fig.show()
def reset(self):
del(self.df_stat)
del(self.nodes)
del(self.links)
import pandas as pd
# loading tazer stat files into pandas dataframe
def stat_to_df(fname):
df = pd.read_csv(fname, sep=' ', names=['block_idx','frequency'], skiprows=1)
return df
!ls tazer_stat/*/
tazer_stat/aggregate.py/: aggregated.h5_r_stat task0001 task0004 aggregated.h5_w_stat task0002 task0005 task0000 task0003 tazer_stat/sim_emulator.py/: task0000 task0001 task0002 task0003 task0004 task0005
!ls tazer_stat/*/*
tazer_stat/aggregate.py/aggregated.h5_r_stat tazer_stat/aggregate.py/aggregated.h5_w_stat tazer_stat/aggregate.py/task0000: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/aggregate.py/task0001: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/aggregate.py/task0002: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/aggregate.py/task0003: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/aggregate.py/task0004: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/aggregate.py/task0005: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/sim_emulator.py/task0000: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/sim_emulator.py/task0001: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/sim_emulator.py/task0002: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/sim_emulator.py/task0003: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/sim_emulator.py/task0004: residue_100_output.h5_r_stat residue_100_output.h5_w_stat tazer_stat/sim_emulator.py/task0005: residue_100_output.h5_r_stat residue_100_output.h5_w_stat
task_cnt = 6
sim_df_r = [None for x in range(task_cnt)]
sim_df_w = [None for x in range(task_cnt)]
agg_df_r = [None for x in range(task_cnt)]
agg_df_w = [None for x in range(task_cnt)]
for i in range(task_cnt):
sim_df_r[i] = stat_to_df(f'tazer_stat/sim_emulator.py/task000{i}/residue_100_output.h5_r_stat')
sim_df_w[i] = stat_to_df(f'tazer_stat/sim_emulator.py/task000{i}/residue_100_output.h5_w_stat')
agg_df_r[i] = stat_to_df(f'tazer_stat/aggregate.py/task000{i}/residue_100_output.h5_r_stat')
agg_df_w[i] = stat_to_df(f'tazer_stat/aggregate.py/task000{i}/residue_100_output.h5_w_stat')
agg_last_df_r = stat_to_df('tazer_stat/aggregate.py/aggregated.h5_r_stat')
agg_last_df_w = stat_to_df('tazer_stat/aggregate.py/aggregated.h5_w_stat')
sd = SankeyData()
for i in range(task_cnt):
sd.load_stat(sim_df_r[i], {'filename': f'residue_100_output.h5 ({i})',
'type': 'r',
'size':10727328,
'task_name': 'sim_emulator.py'})
sd.load_stat(sim_df_w[i], {'filename': f'residue_100_output.h5 ({i})',
'type': 'w',
'size':10727328,
'task_name': 'sim_emulator.py'})
sd.load_stat(agg_df_r[i], {'filename': f'residue_100_output.h5 ({i})',
'type': 'r',
'size':10727328,
'task_name': 'aggregate.py'})
sd.load_stat(agg_df_w[i], {'filename': f'residue_100_output.h5 ({i})',
'type': 'w',
'size':10727328,
'task_name': 'aggregate.py'})
sd.load_stat(agg_last_df_r, {'filename': 'aggregated.h5',
'type': 'r',
'size': 63502078,
'task_name': 'aggregate.py'})
sd.load_stat(agg_last_df_w, {'filename': 'aggregated.h5',
'type': 'w',
'size': 63502078,
'task_name': 'aggregate.py'})
sd.build_links()
sd.nodes
| label | color | |
|---|---|---|
| 0 | sim_emulator.py | red |
| 1 | residue_100_output.h5 (0) | blue |
| 2 | aggregate.py | red |
| 3 | residue_100_output.h5 (1) | blue |
| 4 | residue_100_output.h5 (2) | blue |
| 5 | residue_100_output.h5 (3) | blue |
| 6 | residue_100_output.h5 (4) | blue |
| 7 | residue_100_output.h5 (5) | blue |
| 8 | aggregated.h5 | blue |
sd.links
| source | target | value | |
|---|---|---|---|
| 0 | 1 | 0 | 5 |
| 1 | 0 | 1 | 10479 |
| 2 | 1 | 2 | 10479 |
| 3 | 2 | 1 | 73 |
| 4 | 3 | 0 | 5 |
| 5 | 0 | 3 | 10488 |
| 6 | 3 | 2 | 10488 |
| 7 | 2 | 3 | 73 |
| 8 | 4 | 0 | 5 |
| 9 | 0 | 4 | 10485 |
| 10 | 4 | 2 | 10485 |
| 11 | 2 | 4 | 73 |
| 12 | 5 | 0 | 5 |
| 13 | 0 | 5 | 10459 |
| 14 | 5 | 2 | 10459 |
| 15 | 2 | 5 | 73 |
| 16 | 6 | 0 | 5 |
| 17 | 0 | 6 | 10477 |
| 18 | 6 | 2 | 10477 |
| 19 | 2 | 6 | 73 |
| 20 | 7 | 0 | 5 |
| 21 | 0 | 7 | 10446 |
| 22 | 7 | 2 | 10446 |
| 23 | 2 | 7 | 73 |
| 24 | 8 | 2 | 2 |
| 25 | 2 | 8 | 62015 |
sd.plot()